{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "bdaa8bf6-3d0c-43fb-bac0-7dd5329f053a",
   "metadata": {},
   "source": [
    "##### ❇️ Pandas 🐼 Scraping data from Wikipedia"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "01892803-208f-438e-b75e-aa4c1ff8b0a7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6ee474c6-9254-4ca0-9342-46f515d57727",
   "metadata": {},
   "outputs": [],
   "source": [
    "tables = pd.read_html('https://en.wikipedia.org/wiki/Roger_Federer')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "491ba7e8-5442-4558-8b5e-b904f1dceafb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Tournament</th>\n",
       "      <th>1999</th>\n",
       "      <th>2000</th>\n",
       "      <th>2001</th>\n",
       "      <th>2002</th>\n",
       "      <th>2003</th>\n",
       "      <th>2004</th>\n",
       "      <th>2005</th>\n",
       "      <th>2006</th>\n",
       "      <th>2007</th>\n",
       "      <th>...</th>\n",
       "      <th>2015</th>\n",
       "      <th>2016</th>\n",
       "      <th>2017</th>\n",
       "      <th>2018</th>\n",
       "      <th>2019</th>\n",
       "      <th>2020</th>\n",
       "      <th>2021</th>\n",
       "      <th>SR</th>\n",
       "      <th>W–L</th>\n",
       "      <th>Win %</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Australian Open</td>\n",
       "      <td>Q1</td>\n",
       "      <td>3R</td>\n",
       "      <td>3R</td>\n",
       "      <td>4R</td>\n",
       "      <td>4R</td>\n",
       "      <td>W</td>\n",
       "      <td>SF</td>\n",
       "      <td>W</td>\n",
       "      <td>W</td>\n",
       "      <td>...</td>\n",
       "      <td>3R</td>\n",
       "      <td>SF</td>\n",
       "      <td>W</td>\n",
       "      <td>W</td>\n",
       "      <td>4R</td>\n",
       "      <td>SF</td>\n",
       "      <td>A</td>\n",
       "      <td>6 / 21</td>\n",
       "      <td>102–15</td>\n",
       "      <td>87%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>French Open</td>\n",
       "      <td>1R</td>\n",
       "      <td>4R</td>\n",
       "      <td>QF</td>\n",
       "      <td>1R</td>\n",
       "      <td>1R</td>\n",
       "      <td>3R</td>\n",
       "      <td>SF</td>\n",
       "      <td>F</td>\n",
       "      <td>F</td>\n",
       "      <td>...</td>\n",
       "      <td>QF</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>SF</td>\n",
       "      <td>A</td>\n",
       "      <td>4R[B]</td>\n",
       "      <td>1 / 19</td>\n",
       "      <td>73–17</td>\n",
       "      <td>81%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Wimbledon</td>\n",
       "      <td>1R</td>\n",
       "      <td>1R</td>\n",
       "      <td>QF</td>\n",
       "      <td>1R</td>\n",
       "      <td>W</td>\n",
       "      <td>W</td>\n",
       "      <td>W</td>\n",
       "      <td>W</td>\n",
       "      <td>W[A]</td>\n",
       "      <td>...</td>\n",
       "      <td>F</td>\n",
       "      <td>SF</td>\n",
       "      <td>W</td>\n",
       "      <td>QF</td>\n",
       "      <td>F</td>\n",
       "      <td>NH</td>\n",
       "      <td>QF</td>\n",
       "      <td>8 / 22</td>\n",
       "      <td>105–14</td>\n",
       "      <td>88%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>US Open</td>\n",
       "      <td>Q2</td>\n",
       "      <td>3R</td>\n",
       "      <td>4R</td>\n",
       "      <td>4R</td>\n",
       "      <td>4R</td>\n",
       "      <td>W[A]</td>\n",
       "      <td>W</td>\n",
       "      <td>W</td>\n",
       "      <td>W</td>\n",
       "      <td>...</td>\n",
       "      <td>F</td>\n",
       "      <td>A</td>\n",
       "      <td>QF</td>\n",
       "      <td>4R</td>\n",
       "      <td>QF</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>5 / 19</td>\n",
       "      <td>89–14</td>\n",
       "      <td>86%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Win–loss</td>\n",
       "      <td>0–2</td>\n",
       "      <td>7–4</td>\n",
       "      <td>13–4</td>\n",
       "      <td>6–4</td>\n",
       "      <td>13–3</td>\n",
       "      <td>22–1</td>\n",
       "      <td>24–2</td>\n",
       "      <td>27–1</td>\n",
       "      <td>26–1</td>\n",
       "      <td>...</td>\n",
       "      <td>18–4</td>\n",
       "      <td>10–2</td>\n",
       "      <td>18–1</td>\n",
       "      <td>14–2</td>\n",
       "      <td>18–4</td>\n",
       "      <td>5–1</td>\n",
       "      <td>7–1</td>\n",
       "      <td>20 / 81</td>\n",
       "      <td>369–60</td>\n",
       "      <td>86%</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        Tournament 1999 2000  2001 2002  2003  2004  2005  2006  2007  ...  \\\n",
       "0  Australian Open   Q1   3R    3R   4R    4R     W    SF     W     W  ...   \n",
       "1      French Open   1R   4R    QF   1R    1R    3R    SF     F     F  ...   \n",
       "2        Wimbledon   1R   1R    QF   1R     W     W     W     W  W[A]  ...   \n",
       "3          US Open   Q2   3R    4R   4R    4R  W[A]     W     W     W  ...   \n",
       "4         Win–loss  0–2  7–4  13–4  6–4  13–3  22–1  24–2  27–1  26–1  ...   \n",
       "\n",
       "   2015  2016  2017  2018  2019 2020   2021       SR     W–L Win %  \n",
       "0    3R    SF     W     W    4R   SF      A   6 / 21  102–15   87%  \n",
       "1    QF     A     A     A    SF    A  4R[B]   1 / 19   73–17   81%  \n",
       "2     F    SF     W    QF     F   NH     QF   8 / 22  105–14   88%  \n",
       "3     F     A    QF    4R    QF    A      A   5 / 19   89–14   86%  \n",
       "4  18–4  10–2  18–1  14–2  18–4  5–1    7–1  20 / 81  369–60   86%  \n",
       "\n",
       "[5 rows x 27 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Federer's performance timeline in Grand Slams\n",
    "performance_in_GS = tables[3]\n",
    "performance_in_GS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c13336e7-c93d-47a7-aec7-791a322ad69f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Result</th>\n",
       "      <th>Year</th>\n",
       "      <th>Tournament</th>\n",
       "      <th>Surface</th>\n",
       "      <th>Opponent</th>\n",
       "      <th>Score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Win</td>\n",
       "      <td>2003</td>\n",
       "      <td>Wimbledon</td>\n",
       "      <td>Grass</td>\n",
       "      <td>Mark Philippoussis</td>\n",
       "      <td>7–6(7–5), 6–2, 7–6(7–3)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Win</td>\n",
       "      <td>2004</td>\n",
       "      <td>Australian Open</td>\n",
       "      <td>Hard</td>\n",
       "      <td>Marat Safin</td>\n",
       "      <td>7–6(7–3), 6–4, 6–2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Win</td>\n",
       "      <td>2004</td>\n",
       "      <td>Wimbledon (2)</td>\n",
       "      <td>Grass</td>\n",
       "      <td>Andy Roddick</td>\n",
       "      <td>4–6, 7–5, 7–6(7–3), 6–4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Win</td>\n",
       "      <td>2004</td>\n",
       "      <td>US Open</td>\n",
       "      <td>Hard</td>\n",
       "      <td>Lleyton Hewitt</td>\n",
       "      <td>6–0, 7–6(7–3), 6–0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Win</td>\n",
       "      <td>2005</td>\n",
       "      <td>Wimbledon (3)</td>\n",
       "      <td>Grass</td>\n",
       "      <td>Andy Roddick</td>\n",
       "      <td>6–2, 7–6(7–2), 6–4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Win</td>\n",
       "      <td>2005</td>\n",
       "      <td>US Open (2)</td>\n",
       "      <td>Hard</td>\n",
       "      <td>Andre Agassi</td>\n",
       "      <td>6–3, 2–6, 7–6(7–1), 6–1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Win</td>\n",
       "      <td>2006</td>\n",
       "      <td>Australian Open (2)</td>\n",
       "      <td>Hard</td>\n",
       "      <td>Marcos Baghdatis</td>\n",
       "      <td>5–7, 7–5, 6–0, 6–2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Result  Year           Tournament Surface            Opponent  \\\n",
       "0    Win  2003            Wimbledon   Grass  Mark Philippoussis   \n",
       "1    Win  2004      Australian Open    Hard         Marat Safin   \n",
       "2    Win  2004        Wimbledon (2)   Grass        Andy Roddick   \n",
       "3    Win  2004              US Open    Hard      Lleyton Hewitt   \n",
       "4    Win  2005        Wimbledon (3)   Grass        Andy Roddick   \n",
       "5    Win  2005          US Open (2)    Hard        Andre Agassi   \n",
       "6    Win  2006  Australian Open (2)    Hard    Marcos Baghdatis   \n",
       "\n",
       "                     Score  \n",
       "0  7–6(7–5), 6–2, 7–6(7–3)  \n",
       "1       7–6(7–3), 6–4, 6–2  \n",
       "2  4–6, 7–5, 7–6(7–3), 6–4  \n",
       "3       6–0, 7–6(7–3), 6–0  \n",
       "4       6–2, 7–6(7–2), 6–4  \n",
       "5  6–3, 2–6, 7–6(7–1), 6–1  \n",
       "6       5–7, 7–5, 6–0, 6–2  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Result across 31 Grand Slam finals\n",
    "gs_finals = tables[4]\n",
    "gs_finals.head(7)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "68d75007-3200-47e0-9b25-8ea821a09f84",
   "metadata": {},
   "source": [
    "##### ❇️ Hope you enjoyed reading!! 📖 \n",
    "##### ❇️ follow → @akshay_pachaar  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a9d4323-897e-467b-9596-f140b9223873",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "env_twitter",
   "language": "python",
   "name": "env_twitter"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
